************************************************************************
* Last change:	07/02/2017		                                       
*************************************************************************

*********** Restrict to regular employees coverd by social security, drop trainees
keep if pers_gr == 101
drop pers_gr

*drop spells where the firm id is missing
replace bnr_ano_n = .  if bnr_ano_n == 0
replace bnr_ano_n = .  if bnr_ano_n >= 99999999
drop if bnr_ano_n == . 

********** Restrict age
keep if alter>15 & alter<=65

*********  drop apprentices and part-time workers
keep if berufstg >= 1 & berufstg <= 4
	
*drop those with missing workplace location
drop if ao_gem < 0 | ao_gem == .

*indicator kreis
gen ao_kreis = int(ao_gem/1000)
replace ao_kreis = . if ao_kreis < 0 | ao_kreis >= 99999
replace ao_kreis = 11000 if ao_kreis == 11100 | ao_kreis == 11200  //Berlin

*indicator east
gen 	ost = 1 if ao_kreis >  11000 & ao_kreis != .	
replace ost = 0 if ao_kreis <= 11000
*gen ost = (int(ao_gem/1000)>11000) 
tab ost, miss
drop if ost==1 & year < 1992


* gen commuting zones
do "do-files\data_management/cr_generate-AMR50_ado.do"

dis "sum ao_gem, year: `y'"
sum ao_gem, d


/**************************************************************
wages, lower and upper bound
*************************************************************/

*lower bound
****************************
* conservative censoring limit, a bit higher than IAB
rename tag_entg entgelt
replace entgelt = entgelt/1.95583 if year <= 1998

replace entgelt=7 if entgelt<=7&year==1980
replace entgelt=7 if entgelt<=7&year==1981
replace entgelt=7 if entgelt<=7&year==1982
replace entgelt=7 if entgelt<=7&year==1983
replace entgelt=7 if entgelt<=7&year==1984
replace entgelt=7 if entgelt<=7&year==1985
replace entgelt=7 if entgelt<=7&year==1986
replace entgelt=8 if entgelt<=8&year==1987
replace entgelt=8 if entgelt<=8&year==1988
replace entgelt=8 if entgelt<=8&year==1989
replace entgelt=8 if entgelt<=8&year==1990
replace entgelt=9 if entgelt<=9&year==1991

replace entgelt=9 if entgelt<=9&year==1992&ost==0
replace entgelt=6 if entgelt<=6&year==1992&ost==1

replace entgelt=9 if entgelt<=9&year==1993&ost==0
replace entgelt=7 if entgelt<=7&year==1993&ost==1

replace entgelt=10 if entgelt<=10&year==1994&ost==0
replace entgelt=8  if entgelt<=8 &year==1994&ost==1

replace entgelt=10 if entgelt<=10&year==1995&ost==0
replace entgelt=8  if entgelt<=8 &year==1995&ost==1

replace entgelt=10 if entgelt<=10&year==1996&ost==0
replace entgelt=9  if entgelt<=9 &year==1996&ost==1

replace entgelt=11 if entgelt<=11&year==1997&ost==0
replace entgelt=9  if entgelt<=9 &year==1997&ost==1

replace entgelt=11 if entgelt<=11&year==1998&ost==0
replace entgelt=9  if entgelt<=9 &year==1998&ost==1

replace entgelt=11 if entgelt<=11&year==1999
replace entgelt=11 if entgelt<=11&year==2000


/*indicator for left censoring*/
gen left=0
replace left = . if entgelt == .

replace left=1 if entgelt<=7&year==1980
replace left=1 if entgelt<=7&year==1981
replace left=1 if entgelt<=7&year==1982
replace left=1 if entgelt<=7&year==1983
replace left=1 if entgelt<=7&year==1984
replace left=1 if entgelt<=7&year==1985
replace left=1 if entgelt<=7&year==1986
replace left=1 if entgelt<=8&year==1987
replace left=1 if entgelt<=8&year==1988
replace left=1 if entgelt<=8&year==1989
replace left=1 if entgelt<=8&year==1990
replace left=1 if entgelt<=9&year==1991

replace left=1 if entgelt<=9 &year==1992&ost==0
replace left=1 if entgelt<=6 &year==1992&ost==1
replace left=1 if entgelt<=9 &year==1993&ost==0
replace left=1 if entgelt<=7 &year==1993&ost==1
replace left=1 if entgelt<=10&year==1994&ost==0
replace left=1 if entgelt<=8 &year==1994&ost==1
replace left=1 if entgelt<=10&year==1995&ost==0
replace left=1 if entgelt<=8 &year==1995&ost==1
replace left=1 if entgelt<=10&year==1996&ost==0
replace left=1 if entgelt<=9 &year==1996&ost==1
replace left=1 if entgelt<=11&year==1997&ost==0
replace left=1 if entgelt<=9 &year==1997&ost==1
replace left=1 if entgelt<=11&year==1998&ost==0
replace left=1 if entgelt<=9 &year==1998&ost==1

replace left=1 if entgelt<=11&year==1999
replace left=1 if entgelt<=11&year==2000


*drop wage observations below censoring limit
	/*we relax left censoring limit to account for
	  wage increases that push individuals above 
	  censoring threshold*/
gen lw=ln(entgelt)
bys ost: egen minw=min(lw)
gen lcens=(lw<minw+0.0005)

drop if lcens==1
drop lcens minw lw

*upper bound
*********************************
*here the censoring limit varies by east and west
/*bysort ost: egen aux=max(climr)
replace climr=aux if climr==.
drop aux

replace entgelt=climr if entgelt>=climr
*/
*upper bound
*******************************************

replace entgelt=68  if entgelt>=68 &year==1980 & entgelt != .
replace entgelt=72  if entgelt>=72 &year==1981 & entgelt != .
replace entgelt=76  if entgelt>=76 &year==1982 & entgelt != .
replace entgelt=80  if entgelt>=80 &year==1983 & entgelt != .
replace entgelt=85  if entgelt>=85 &year==1984 & entgelt != .
replace entgelt=89  if entgelt>=89 &year==1985 & entgelt != .
replace entgelt=92  if entgelt>=92 &year==1986 & entgelt != .
replace entgelt=93  if entgelt>=93 &year==1987 & entgelt != .
replace entgelt=97  if entgelt>=97 &year==1988 & entgelt != .
replace entgelt=99  if entgelt>=99 &year==1989 & entgelt != .
replace entgelt=103 if entgelt>=103&year==1990 & entgelt != .
replace entgelt=107 if entgelt>=107&year==1991 & entgelt != .

replace entgelt=111 if entgelt>=111&year==1992&ost==0 & entgelt != .
replace entgelt=78  if entgelt>=78 &year==1992&ost==1 & entgelt != .

replace entgelt=119 if entgelt>=119&year==1993&ost==0 & entgelt != .
replace entgelt=87  if entgelt>=87 &year==1993&ost==1 & entgelt != .

replace entgelt=125 if entgelt>=125&year==1994&ost==0 & entgelt != .
replace entgelt=97  if entgelt>=97 &year==1994&ost==1 & entgelt != .

replace entgelt=129 if entgelt>=129&year==1995&ost==0 & entgelt != .
replace entgelt=105 if entgelt>=105&year==1995&ost==1 & entgelt != .

replace entgelt=132 if entgelt>=132&year==1996&ost==0 & entgelt != .
replace entgelt=111 if entgelt>=111&year==1996&ost==1 & entgelt != .

replace entgelt=135 if entgelt>=135&year==1997&ost==0 & entgelt != .
replace entgelt=117 if entgelt>=117&year==1997&ost==1 & entgelt != .

replace entgelt=139 if entgelt>=139&year==1998&ost==0 & entgelt != .
replace entgelt=115 if entgelt>=115&year==1998&ost==1 & entgelt != .

replace entgelt=140 if entgelt>=140&year==1999&ost==0 & entgelt != .
replace entgelt=119 if entgelt>=119&year==1999&ost==1 & entgelt != .

replace entgelt=142 if entgelt>=142&year==2000&ost==0 & entgelt != .
replace entgelt=117 if entgelt>=117&year==2000&ost==1 & entgelt != .


*censoring indicator
gen lw=ln(entgelt)
bys ost: egen maxw=max(lw)		
gen cens=(lw>maxw-0.001)
replace cens = .  if entgelt == .
label variable cens "wage is censored"
drop maxw

*create additional variables
gen fsize=estsize
gen fsize2=fsize^2/10
gen fsize11=(fsize>=11)

